Set up

library(data.table)
library(tidyverse)
library(smacof)
library(DESeq2)
library(InteractionSet)
library(diffHic)
library(broom)
library(highcharter)
library(heatmaply)
library(plotly)
library(eulerr)
library(reactable)
# ("data.table", "tidyverse", "smacof", "DESeq2", "InteractionSet", "diffHic",
#       "broom", "highcharter", "heatmaply", "plotly", "eulerr")
# set your working directory
setwd("~/Documents/HGEN_663/extra/lec12")

Import

load('lec12.RData')

Contact matrices

There’s a number of ways through which we can examine the correlation matrix

Hierarchical clustering

tibble(s = names(hr)) %>%
  mutate(type = factor(sub('_.*', '', s))) %>%
  column_to_rownames('s') %>%
  heatmaply(hr, row_side_colors = ., col_side_colors = .,
            label_names = c("Sample 1", "Sample 2", "SCC"))

Multidimensional scaling

sim2diss(hr, method = 'corr') %>%
  mds(ndim = 2) %>%
  .$conf %>%
  as.data.frame() %>%
  rownames_to_column('s') %>%
  mutate(type = sub('_.*', '', s)) %>%
  plot_ly(x = ~D1, y = ~D2, color = ~type, text = ~s) %>%
  add_markers(legendgroup = ~type) %>%
  add_text(textposition = 'top left', showlegend = F, legendgroup = ~type)

P(s)

For the contact probability decay curves and their derivatives that we’ve computed before, we can again just simply visualize them

# colors for each cell type
hclrs <- c(ESC = "#7cb5ec", NPC = "#434348")

# plot data
pd <- ps %>%
  lapply(function(x) {
    d <- rbindlist(x, idcol = 'samp')
    ifelse('slope' %in% names(d), 'slope', 'balanced.avg') %>%
      c('samp', 's_bp', .) %>%
      d[, ., with = F] %>%
      `colnames<-`(c('samp', 'x', 'y'))
  }) %>%
  rbindlist(idcol = 'kind') %>%
  group_by(kind, samp) %>%
  do(data = list_parse2(data.frame(.$x, .$y))) %>%
  ungroup() %>%
  separate(samp, c('ctype', 'rep'), '_', F, fill = 'right') %>%
  #dplyr::filter(is.na(rep)) %>%
  mutate(color = hclrs[ctype],
         ctype = factor(ctype, names(hclrs))) %>%
  arrange(ctype) %>%
  mutate(name = samp,
         id = ifelse(kind != 'log', tolower(name), NA),
         linkedTo = ifelse(kind == 'log', tolower(name), NA),
         yAxis = ifelse(kind == 'log', 0, 2))

highchart() %>%
  # we use log scale for P(s) and linear for the slope (which was already taken in log space)
  hc_yAxis_multiples(list(title = list(text = "<b>Contact probability</b>, P<sub>c</sub>(s)",
                                       useHTML = TRUE),
                          type = "logarithmic",
                          labels = list(formatter = JS("function(){return this.value.toExponential(0);}")),
                          height = '45%', top = '0%', offset = 0),
                     list(height = '10%', top = '45%',
                          title = NULL,
                          plotLines = list(
                            list(color = "#a9a9a9", width = 2,
                                 value = .5, zIndex = 1)
                          ),
                          labels = list(enabled = F),
                          gridLineWidth = 0,
                          min = 0, max = 1),
                     list(type = "linear",
                          title = list(
                            text = "<b>Slope</b>, <sup>d</sup>&frasl;<sub>ds</sub> log P<sub>c</sub>(s)",
                            useHTML = TRUE
                          ),
                          height = '45%', top = '55%', offset = 0)) %>%
  # grab the data
  hc_add_series_list(pd) %>%
  # a bit of formatting
  hc_xAxis(type = "logarithmic",
           title = list(text = "<b>Genomic separation</b> (bp), s",
                        useHTML = T),
           minorTickInterval = 'auto',
           min = 1e4, max = 1e8) %>%
  hc_tooltip(headerFormat = '<span style="font-size: 10px">{point.key:,.0f} bp</span><br/>') %>%
  hc_chart(zoomType = "xy") %>%
  hc_plotOptions(line = list(marker = list(enabled = F)))

Compartments

Next we’ll check out the first eigenvector tracks (i.e., “compartment score”)

d <- lapply(dat, `[[`, 'E1') %>% bind_cols() %>% na.omit()
x <- d$ESC
y <- d$NPC
# edges
lnks <- d %>%
  mutate(across(everything(), function(x) ifelse(x > 0, 'A', 'B'))) %>%
  `colnames<-`(c('from', 'to')) %>%
  dplyr::count(from, to, name = 'weight') %>%
  mutate(from = sprintf('%s.%s', 'ESC', from),
         to = sprintf('%s.%s', 'NPC', to),
         weight = 100 * weight / sum(weight)) %>%
  transpose() 

# order of cell types
odr <- names(hclrs)

# nodes
nds <- lapply(lnks, function(x) c(x$from, x$to)) %>%
  unlist() %>%
  unique() %>%
  lapply(function(x) {
    tibble(id = x) %>%
      separate(id, c('samp', 'name'), '\\.', F) %>%
      mutate(column = which(odr == samp) - 1,
             color = ifelse(name == 'A', '#6AA56E', '#B65655')) %>%
      as.list()
  })

# show as sankey
highchart() %>%
  hc_chart(type = 'sankey') %>%
  hc_add_series(data = lnks, nodes = nds) %>%
  hc_xAxis(tickPositions = c(-0.49, 3.5),
           startOnTicks = F,
           endOnTicks = F,
           lineColor = 'transparent',
           labels = list(formatter = JS("function() {
               var d;
               if (this.value < 0) {
                   d = 'ES';
               } else if (this.value > 3) {
                   d = 'NPC';
               }
               return d;
           }"), align = 'center'),
           tickLength = 0) %>%
  hc_chart(showAxes = T) %>%
  hc_tooltip(nodeFormatter = JS("function() {
                 return '<span style=\"font-size: 10px\">' + this.options.samp +
                     '</span><br/>' + this.options.name + ': ' + 
                     Highcharts.numberFormat(this.sum, 1) + '%';
             }"),
             pointFormatter = JS("function() {
                 return '<span style=\"font-size: 10px\">' +
                     this.fromNode.options.samp + '&#8594;' +
                     this.toNode.options.samp + '</span><br/>' +
                     this.fromNode.options.name + '&#8594;' +
                     this.toNode.options.name + ': ' +
                     Highcharts.numberFormat(this.weight, 1) + '%';
             }"),
             headerFormat = '',
             useHTML = T)

Insulation

For the simplest comparison we can just count the number of shared boundaries

bdrs <- ins[c('ESC', 'NPC')] %>%
  lapply(function(x) {
    na.omit(x) %>%
      dplyr::filter(boundary_strength_100000 > .5) %>%
      mutate(start = start + 1) %>%
      makeGRangesFromDataFrame()
  })

Reduce(c, bdrs) %>% 
  unique() %>%
  {lapply(bdrs, function(x) overlapsAny(., x))} %>%
  bind_cols() %>%
  euler() %>%
  plot(quantities = T)


Loops

Called loops

These are loops called separately in each sample. There are specific classes in R that handles paired ranges, one of which is GInteractions

lps <- dots[c('ESC', 'NPC')] %>%
  lapply(function(x) {
    list(x[,1:3], x[,4:6]) %>%
      lapply(function(y) {
        y %>%
          `colnames<-`(c('chr', 'start', 'end')) %>%
          mutate(start = start + 1) %>%
          makeGRangesFromDataFrame()
      }) %>%
      {GInteractions(.[[1]], .[[2]], mode = 'reverse')}
  })

ESC

lps$ESC 
## ReverseStrictGInteractions object with 3738 interactions and 0 metadata columns:
##          seqnames1             ranges1     seqnames2             ranges2
##              <Rle>           <IRanges>         <Rle>           <IRanges>
##      [1]      chr1     4750001-4775000 ---      chr1     4500001-4525000
##      [2]      chr1     5000001-5025000 ---      chr1     4475001-4500000
##      [3]      chr1     5900001-5925000 ---      chr1     4475001-4500000
##      [4]      chr1     5900001-5925000 ---      chr1     4750001-4775000
##      [5]      chr1     5900001-5925000 ---      chr1     4900001-4925000
##      ...       ...                 ... ...       ...                 ...
##   [3734]      chrX 167800001-167825000 ---      chrX 167300001-167325000
##   [3735]      chrX 167800001-167825000 ---      chrX 167475001-167500000
##   [3736]      chrX 168925001-168950000 ---      chrX 168400001-168425000
##   [3737]      chrX 168925001-168950000 ---      chrX 168700001-168725000
##   [3738]      chrX 169275001-169300000 ---      chrX 168925001-168950000
##   -------
##   regions: 5537 ranges and 0 metadata columns
##   seqinfo: 20 sequences from an unspecified genome; no seqlengths

NPC

lps$NPC
## ReverseStrictGInteractions object with 4394 interactions and 0 metadata columns:
##          seqnames1             ranges1     seqnames2             ranges2
##              <Rle>           <IRanges>         <Rle>           <IRanges>
##      [1]      chr1     4750001-4775000 ---      chr1     4500001-4525000
##      [2]      chr1     5900001-5925000 ---      chr1     4750001-4775000
##      [3]      chr1     5900001-5925000 ---      chr1     5100001-5125000
##      [4]      chr1     6125001-6150000 ---      chr1     5100001-5125000
##      [5]      chr1     6125001-6150000 ---      chr1     5900001-5925000
##      ...       ...                 ... ...       ...                 ...
##   [4390]      chrX 167225001-167250000 ---      chrX 166650001-166675000
##   [4391]      chrX 167200001-167225000 ---      chrX 166800001-166825000
##   [4392]      chrX 168925001-168950000 ---      chrX 167275001-167300000
##   [4393]      chrX 168925001-168950000 ---      chrX 168650001-168675000
##   [4394]      chrX 169775001-169800000 ---      chrX 169325001-169350000
##   -------
##   regions: 6117 ranges and 0 metadata columns
##   seqinfo: 20 sequences from an unspecified genome; no seqlengths

Differential loop calling

diff_loop <- fread("output.pareidolia.tsv")
diff_loop %>% reactable()

Extra

Epigenetic correlation

dat$ESC %>% 
  bind_cols() %>% 
  na.omit() %>%
  mutate(across(-E1, log10)) %>% 
  mutate(across(-E1, function(x) x - Input)) %>% 
  dplyr::filter(is.finite(rowSums(.))) %>% 
  dplyr::select(-Input) %>% 
  cor() %>%
  heatmaply_cor()